"""Subexpressions that make up a parsed grammar

These do the parsing.

"""
# TODO: Make sure all symbol refs are local--not class lookups or
# anything--for speed. And kill all the dots.

from inspect import getargspec
import re

from six import integer_types, python_2_unicode_compatible
from six.moves import range

from parsimonious.exceptions import ParseError, IncompleteParseError
from parsimonious.nodes import Node, RegexNode
from parsimonious.utils import StrAndRepr

MARKER = object()


def expression(callable, rule_name, grammar):
    """Turn a plain callable into an Expression.

    The callable can be of this simple form::

        def foo(text, pos):
            '''If this custom expression matches starting at text[pos], return
            the index where it stops matching. Otherwise, return None.'''
            if the expression matched:
                return end_pos

    If there child nodes to return, return a tuple::

        return end_pos, children

    If the expression doesn't match at the given ``pos`` at all... ::

        return None

    If your callable needs to make sub-calls to other rules in the grammar or
    do error reporting, it can take this form, gaining additional arguments::

        def foo(text, pos, cache, error, grammar):
            # Call out to other rules:
            node = grammar['another_rule'].match_core(text, pos, cache, error)
            ...
            # Return values as above.

    The return value of the callable, if an int or a tuple, will be
    automatically transmuted into a :class:`~parsimonious.Node`. If it returns
    a Node-like class directly, it will be passed through unchanged.

    :arg rule_name: The rule name to attach to the resulting
        :class:`~parsimonious.Expression`
    :arg grammar: The :class:`~parsimonious.Grammar` this expression will be a
        part of, to make delegating to other rules possible

    """
    num_args = len(getargspec(callable).args)
    if num_args == 2:
        is_simple = True
    elif num_args == 5:
        is_simple = False
    else:
        raise RuntimeError("Custom rule functions must take either 2 or 5 "
                           "arguments, not %s." % num_args)

    class AdHocExpression(Expression):
        def _uncached_match(self, text, pos, cache, error):
            result = (callable(text, pos) if is_simple else
                      callable(text, pos, cache, error, grammar))

            if isinstance(result, integer_types):
                end, children = result, None
            elif isinstance(result, tuple):
                end, children = result
            else:
                # Node or None
                return result
            return Node(self, text, pos, end, children=children)

        def _as_rhs(self):
            return '{custom function "%s"}' % callable.__name__

    return AdHocExpression(name=rule_name)


@python_2_unicode_compatible
class Expression(StrAndRepr):
    """A thing that can be matched against a piece of text"""

    # Slots are about twice as fast as __dict__-based attributes:
    # http://stackoverflow.com/questions/1336791/dictionary-vs-object-which-is-more-efficient-and-why

    # Top-level expressions--rules--have names. Subexpressions are named ''.
    __slots__ = ['name', 'identity_tuple']

    def __init__(self, name=''):
        self.name = name
        self.identity_tuple = (self.name, )

    def __hash__(self):
        return hash(self.identity_tuple)

    def __eq__(self, other):
        return isinstance(other, self.__class__) and self.identity_tuple == other.identity_tuple

    def __ne__(self, other):
        return not (self == other)

    def parse(self, text, pos=0):
        """Return a parse tree of ``text``.

        Raise ``ParseError`` if the expression wasn't satisfied. Raise
        ``IncompleteParseError`` if the expression was satisfied but didn't
        consume the full string.

        """
        node = self.match(text, pos=pos)
        if node.end < len(text):
            raise IncompleteParseError(text, node.end, self)
        return node

    def match(self, text, pos=0):
        """Return the parse tree matching this expression at the given
        position, not necessarily extending all the way to the end of ``text``.

        Raise ``ParseError`` if there is no match there.

        :arg pos: The index at which to start matching

        """
        error = ParseError(text)
        node = self.match_core(text, pos, {}, error)
        if node is None:
            raise error
        return node

    def match_core(self, text, pos, cache, error):
        """Internal guts of ``match()``

        This is appropriate to call only from custom rules or Expression
        subclasses.

        :arg cache: The packrat cache::

            {(oid, pos): Node tree matched by object `oid` at index `pos` ...}

        :arg error: A ParseError instance with ``text`` already filled in but
            otherwise blank. We update the error reporting info on this object
            as we go. (Sticking references on an existing instance is faster
            than allocating a new one for each expression that fails.) We
            return None rather than raising and catching ParseErrors because
            catching is slow.

        """
        # TODO: Optimize. Probably a hot spot.
        #
        # Is there a way of looking up cached stuff that's faster than hashing
        # this id-pos pair?
        #
        # If this is slow, think about the array module. It might (or might
        # not!) use more RAM, but it'll likely be faster than hashing things
        # all the time. Also, can we move all the allocs up front?
        #
        # To save space, we have lots of choices: (0) Quit caching whole Node
        # objects. Cache just what you need to reconstitute them. (1) Cache
        # only the results of entire rules, not subexpressions (probably a
        # horrible idea for rules that need to backtrack internally a lot). (2)
        # Age stuff out of the cache somehow. LRU? (3) Cuts.
        expr_id = id(self)
        node = cache.get((expr_id, pos), MARKER)  # TODO: Change to setdefault to prevent infinite recursion in left-recursive rules.
        if node is MARKER:
            node = cache[(expr_id, pos)] = self._uncached_match(text,
                                                                pos,
                                                                cache,
                                                                error)

        # Record progress for error reporting:
        if node is None and pos >= error.pos and (
                self.name or getattr(error.expr, 'name', None) is None):
            # Don't bother reporting on unnamed expressions (unless that's all
            # we've seen so far), as they're hard to track down for a human.
            # Perhaps we could include the unnamed subexpressions later as
            # auxiliary info.
            error.expr = self
            error.pos = pos

        return node

    def __str__(self):
        return u'<%s %s>' % (
            self.__class__.__name__,
            self.as_rule())

    def as_rule(self):
        """Return the left- and right-hand sides of a rule that represents me.

        Return unicode. If I have no ``name``, omit the left-hand side.

        """
        rhs = self._as_rhs().strip()
        if rhs.startswith('(') and rhs.endswith(')'):
            rhs = rhs[1:-1]

        return (u'%s = %s' % (self.name, rhs)) if self.name else rhs

    def _unicode_members(self):
        """Return an iterable of my unicode-represented children, stopping
        descent when we hit a named node so the returned value resembles the
        input rule."""
        return [(m.name or m._as_rhs()) for m in self.members]

    def _as_rhs(self):
        """Return the right-hand side of a rule that represents me.

        Implemented by subclasses.

        """
        raise NotImplementedError


class Literal(Expression):
    """A string literal

    Use these if you can; they're the fastest.

    """
    __slots__ = ['literal']

    def __init__(self, literal, name=''):
        super(Literal, self).__init__(name)
        self.literal = literal
        self.identity_tuple = (name, literal)

    def _uncached_match(self, text, pos, cache, error):
        if text.startswith(self.literal, pos):
            return Node(self, text, pos, pos + len(self.literal))

    def _as_rhs(self):
        # TODO: Get backslash escaping right.
        return '"%s"' % self.literal


class TokenMatcher(Literal):
    """An expression matching a single token of a given type

    This is for use only with TokenGrammars.

    """
    def _uncached_match(self, token_list, pos, cache, error):
        if token_list[pos].type == self.literal:
            return Node(self, token_list, pos, pos + 1)


class Regex(Expression):
    """An expression that matches what a regex does.

    Use these as much as you can and jam as much into each one as you can;
    they're fast.

    """
    __slots__ = ['re']

    def __init__(self, pattern, name='', ignore_case=False, locale=False,
                 multiline=False, dot_all=False, unicode=False, verbose=False):
        super(Regex, self).__init__(name)
        self.re = re.compile(pattern, (ignore_case and re.I) |
                                      (locale and re.L) |
                                      (multiline and re.M) |
                                      (dot_all and re.S) |
                                      (unicode and re.U) |
                                      (verbose and re.X))
        self.identity_tuple = (self.name, self.re)

    def _uncached_match(self, text, pos, cache, error):
        """Return length of match, ``None`` if no match."""
        m = self.re.match(text, pos)
        if m is not None:
            span = m.span()
            node = RegexNode(self, text, pos, pos + span[1] - span[0])
            node.match = m  # TODO: A terrible idea for cache size?
            return node

    def _regex_flags_from_bits(self, bits):
        """Return the textual equivalent of numerically encoded regex flags."""
        flags = 'ilmsux'
        return ''.join(flags[i - 1] if (1 << i) & bits else '' for i in range(1, len(flags) + 1))

    def _as_rhs(self):
        # TODO: Get backslash escaping right.
        return '~"%s"%s' % (self.re.pattern,
                            self._regex_flags_from_bits(self.re.flags))


class Compound(Expression):
    """An abstract expression which contains other expressions"""

    __slots__ = ['members']

    def __init__(self, *members, **kwargs):
        """``members`` is a sequence of expressions."""
        super(Compound, self).__init__(kwargs.get('name', ''))
        self.members = members

    def __hash__(self):
        # Note we leave members out of the hash computation, since compounds can get added to
        # sets, then have their members mutated. See RuleVisitor._resolve_refs.
        # Equality should still work, but we want the rules to go into the correct hash bucket.
        return hash((self.__class__, self.name))

    def __eq__(self, other):
        return (
            isinstance(other, self.__class__) and
            self.name == other.name and
            self.members == other.members)


class Sequence(Compound):
    """A series of expressions that must match contiguous, ordered pieces of
    the text

    In other words, it's a concatenation operator: each piece has to match, one
    after another.

    """
    def _uncached_match(self, text, pos, cache, error):
        new_pos = pos
        length_of_sequence = 0
        children = []
        for m in self.members:
            node = m.match_core(text, new_pos, cache, error)
            if node is None:
                return None
            children.append(node)
            length = node.end - node.start
            new_pos += length
            length_of_sequence += length
        # Hooray! We got through all the members!
        return Node(self, text, pos, pos + length_of_sequence, children)

    def _as_rhs(self):
        return u'({0})'.format(u' '.join(self._unicode_members()))


class OneOf(Compound):
    """A series of expressions, one of which must match

    Expressions are tested in order from first to last. The first to succeed
    wins.

    """
    def _uncached_match(self, text, pos, cache, error):
        for m in self.members:
            node = m.match_core(text, pos, cache, error)
            if node is not None:
                # Wrap the succeeding child in a node representing the OneOf:
                return Node(self, text, pos, node.end, children=[node])

    def _as_rhs(self):
        return u'({0})'.format(u' / '.join(self._unicode_members()))


class Lookahead(Compound):
    """An expression which consumes nothing, even if its contained expression
    succeeds"""

    # TODO: Merge this and Not for better cache hit ratios and less code.
    # Downside: pretty-printed grammars might be spelled differently than what
    # went in. That doesn't bother me.

    def _uncached_match(self, text, pos, cache, error):
        node = self.members[0].match_core(text, pos, cache, error)
        if node is not None:
            return Node(self, text, pos, pos)

    def _as_rhs(self):
        return u'&%s' % self._unicode_members()[0]


class Not(Compound):
    """An expression that succeeds only if the expression within it doesn't

    In any case, it never consumes any characters; it's a negative lookahead.

    """
    def _uncached_match(self, text, pos, cache, error):
        # FWIW, the implementation in Parsing Techniques in Figure 15.29 does
        # not bother to cache NOTs directly.
        node = self.members[0].match_core(text, pos, cache, error)
        if node is None:
            return Node(self, text, pos, pos)

    def _as_rhs(self):
        # TODO: Make sure this parenthesizes the member properly if it's an OR
        # or AND.
        return u'!%s' % self._unicode_members()[0]


# Quantifiers. None of these is strictly necessary, but they're darn handy.

class Optional(Compound):
    """An expression that succeeds whether or not the contained one does

    If the contained expression succeeds, it goes ahead and consumes what it
    consumes. Otherwise, it consumes nothing.

    """
    def _uncached_match(self, text, pos, cache, error):
        node = self.members[0].match_core(text, pos, cache, error)
        return (Node(self, text, pos, pos) if node is None else
                Node(self, text, pos, node.end, children=[node]))

    def _as_rhs(self):
        return u'%s?' % self._unicode_members()[0]


# TODO: Merge with OneOrMore.
class ZeroOrMore(Compound):
    """An expression wrapper like the * quantifier in regexes."""

    def _uncached_match(self, text, pos, cache, error):
        new_pos = pos
        children = []
        while True:
            node = self.members[0].match_core(text, new_pos, cache, error)
            if node is None or not (node.end - node.start):
                # Node was None or 0 length. 0 would otherwise loop infinitely.
                return Node(self, text, pos, new_pos, children)
            children.append(node)
            new_pos += node.end - node.start

    def _as_rhs(self):
        return u'%s*' % self._unicode_members()[0]


class OneOrMore(Compound):
    """An expression wrapper like the + quantifier in regexes.

    You can also pass in an alternate minimum to make this behave like "2 or
    more", "3 or more", etc.

    """
    __slots__ = ['min']

    # TODO: Add max. It should probably succeed if there are more than the max
    # --just not consume them.

    def __init__(self, member, name='', min=1):
        super(OneOrMore, self).__init__(member, name=name)
        self.min = min

    def _uncached_match(self, text, pos, cache, error):
        new_pos = pos
        children = []
        while True:
            node = self.members[0].match_core(text, new_pos, cache, error)
            if node is None:
                break
            children.append(node)
            length = node.end - node.start
            if length == 0:  # Don't loop infinitely.
                break
            new_pos += length
        if len(children) >= self.min:
            return Node(self, text, pos, new_pos, children)

    def _as_rhs(self):
        return u'%s+' % self._unicode_members()[0]
